library(pacman)

p_load(data.table, edgeR, ggplot2, hrbrthemes, viridis, tidyverse, scales)

doc_theme <- theme_ipsum(
    base_family = "Arial", 
    caption_margin = 12,
    axis_title_size = 12,
    axis_col = "black")

curated_names <- fread(
    "../../curated_names.tsv")

aba_bed <- fread(
    "../../CP046654.1.bed",
    col.names = c(
        "chromosome",
        "left",
        "right",
        "locus_tag",
        "gene_name",
        "strand",
        "coding",
        "completeness"))

aba <- fread(
    "../../all_counts_seal.tsv.gz",
    header = FALSE,
    col.names = c(
        "spacer",
        "count",
        "condition"))

aba_key <- fread(
    "../../aba_key.tsv")

aba_design <- fread(
    "../../ABA1_experimental_design.tsv",
    na.strings = c("NA"))

aba_genome <- aba_bed[
    aba_key[, .(spacer, type, locus_tag, y_pred, target, offset)], 
    on = .(locus_tag)]

# define the experimental design space to only take into consideration "tubes"
aba_design <- aba_design[experiment == "tube"]

publication_design <- copy(aba_design)

# publication_design[, timing := gsub("T", "t", timing)]

publication_design[, rep := paste0("(", rep, ")")]

# keep only the counts that are in the experimental design space
aba <- aba[condition %in% aba_design$condition]

# convert single column into a table 
aba_grid <- 
    data.table::dcast(
        aba, 
        spacer ~ factor(condition, levels = unique(condition)),
        value.var = "count", 
        fill = 0)

aba_grid_matrix <- 
    data.matrix(aba_grid[, -c("spacer")])

row.names(aba_grid_matrix) <- aba_grid$spacer
d <- dist(t(cpm(aba_grid_matrix)), method = "canberra")

d_fit <- cmdscale(d,eig = TRUE, k = 2) # k is the number of dim

d_scale <- data.table(d_fit$points, keep.rownames = "condition")

d_scale <- publication_design[d_scale, on = .(condition)]

d_scale[, Timing := timing]

d_scale[, Chemical := paste(drug, dose, "ug/mL")]

setnames(d_scale, c("V1", "V2"), c("Dimension 1", "Dimension 2"))

this_title <- "MDS by Sample"

plot_object <- d_scale %>% 
    arrange(dose) %>%
    mutate(Chemical = factor(Chemical, levels = unique(Chemical))) %>%
    ggplot(
    aes(
        x = `Dimension 1`, 
        y = `Dimension 2`,
        fill = Chemical,
        shape = Timing)) +
    geom_point(size = 5) +
    doc_theme +
    # ggtitle(this_title) +
    scale_fill_manual(
        values = alpha(
            c(
                "grey",    # no drug
                "#a6cee3", # Imipenem 0.06 
                "#1f78b4", # Imipenem 0.09
                "#b2df8a", # Meropenem 0.11
                "#33a02c", # Meropenem 0.17
                "#e31a1c", # Rifampicin 0.34 
                "#ff7f00"  # Colistin 0.44 
                ),
            0.50)) +
    scale_shape_manual(
        name = "Timing", 
        values = c(21, 22, 23)) +
    guides(
        fill = guide_legend(override.aes = list(shape = 21, stroke = 0), order = 1))

print(plot_object)

ggsave("MDS.svg", width = 6, height = 4, dpi = 600, units = "in", device='svg')
samples_T1 <-
    dcast(melt(aba_grid, id.vars = "spacer")[variable %in% aba_design[timing == "T1", condition]], spacer ~ variable, value.var = "value")

samples_T1_matrix <- data.matrix(samples_T1[, -1])

rownames(samples_T1_matrix) <- samples_T1$spacer

d <- dist(t(cpm(samples_T1_matrix)), method = "canberra")

d_fit <- cmdscale(d,eig = TRUE, k = 2) # k is the number of dim

d_scale <- data.table(d_fit$points, keep.rownames = "condition")

d_scale <- publication_design[d_scale, on = .(condition)]

d_scale[, Timing := timing]

d_scale[, Chemical := paste(drug, dose, "ug/mL")]

setnames(d_scale, c("V1", "V2"), c("Dimension 1", "Dimension 2"))

this_title <- "MDS by Sample at t1"

plot_object <- d_scale %>%
    arrange(dose) %>%
    mutate(Chemical = factor(Chemical, levels = unique(Chemical))) %>%
    ggplot(
    aes(
        x = `Dimension 1`, 
        y = `Dimension 2`,
        fill = Chemical)) +
    geom_point(size = 5, shape = 22) +
    doc_theme +
    ggtitle(this_title) +
scale_fill_manual(
        values = alpha(
            c(
                "grey",
                "#a6cee3",
                "#1f78b4",
                "#b2df8a",
                "#33a02c",
                "#e31a1c",
                "#ff7f00"),
            0.35)) +
    guides(
        fill = guide_legend(
            override.aes = list(shape = 22),
            order = 1))

print(plot_object)

ggsave("MDS_T1.svg", width = 6, height = 4, dpi = 600, units = "in", device='svg')
samples_T1 <-
    dcast(melt(aba_grid, id.vars = "spacer")[variable %in% aba_design[timing == "T2", condition]], spacer ~ variable, value.var = "value")

samples_T1_matrix <- data.matrix(samples_T1[, -1])

rownames(samples_T1_matrix) <- samples_T1$spacer

d <- dist(t(cpm(samples_T1_matrix)), method = "canberra")

d_fit <- cmdscale(d,eig = TRUE, k = 2) # k is the number of dim

d_scale <- data.table(d_fit$points, keep.rownames = "condition")

d_scale <- publication_design[d_scale, on = .(condition)]

d_scale[, Timing := timing]

d_scale[, Chemical := paste(drug, dose, "ug/mL")]

setnames(d_scale, c("V1", "V2"), c("Dimension 1", "Dimension 2"))

this_title <- "MDS by Sample at t2"

plot_object <- d_scale %>%
    arrange(dose) %>%
    mutate(Chemical = factor(Chemical, levels = unique(Chemical))) %>%
    ggplot(
    aes(
        x = `Dimension 1`, 
        y = `Dimension 2`,
        fill = Chemical)) +
    geom_point(size = 5, shape = 23) +
    doc_theme +
    ggtitle(this_title) +
scale_fill_manual(
        values = alpha(
            c(
                "grey",
                "#a6cee3",
                "#1f78b4",
                "#b2df8a",
                "#33a02c",
                "#e31a1c",
                "#ff7f00"),
            0.35)) +
    guides(
        fill = guide_legend(
            override.aes = list(shape = 23),
            order = 1))

print(plot_object)

ggsave("MDS_T2.svg", width = 6, height = 4, dpi = 600, units = "in", device='svg')
aba.cpm <-
    aba_grid_matrix %>% 
    cpm %>% 
    as_tibble(rownames = "spacer") %>% 
    pivot_longer(!spacer, names_to = "condition", values_to = "CPM") %>%
    full_join(aba_design) %>%
    mutate(rep = paste("Replicate", rep)) %>%
    group_by(verbose, timing) %>% 
    nest %>%
    mutate(
        data = map(
            data, ~.x %>% pivot_wider(id_cols = spacer, names_from = rep, values_from = CPM))) %>%
    filter(timing != "T0") %>%
    mutate(
        correlation = paste0("(", round(
            map_dbl(
                data, 
                ~cor(.$`Replicate 1`, .$`Replicate 2`)), 2), ")")) %>%
    unite("Sample Name", c("verbose", "timing", "correlation"), sep = " ")
    

aba.cpm <- aba.cpm %>%
    mutate(
        plot = map2(
            data,
            `Sample Name`,
            ~ggplot(
                .x,
                aes(x = `Replicate 1`, y = `Replicate 2`)) +
                geom_point() +
                geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red") +
                doc_theme +
                theme(legend.position = "bottom") +
                ggtitle(paste("CPM:", .y)) +
                scale_x_continuous(
                    trans = "log10", 
                    limits = c(1, pmax(max(.$`Replicate 1`), max(.$`Replicate 2`)))) +
                scale_y_continuous(
                    trans = "log10", 
                    limits = c(1, pmax(max(.$`Replicate 1`), max(.$`Replicate 2`))))))

print(aba.cpm$plot)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

aba.cpm %>% select(-plot) %>%
    filter(`Sample Name` %like% "No drug") %>%
    unnest(cols = `data`) %>% 
    ggplot(
        aes(x = `Replicate 1`, y = `Replicate 2`)) + 
    geom_point() + 
    facet_wrap(facets = "`Sample Name`") + 
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number_si()) +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number_si()) +
    geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red") 
## Warning: `label_number_si()` was deprecated in scales 1.2.0.
## Please use the `scale_cut` argument of `label_number()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

aba.cpm %>% select(-plot) %>%
    filter(!`Sample Name` %like% "No drug") %>%
    unnest(cols = `data`) %>% 
    ggplot(
        aes(x = `Replicate 1`, y = `Replicate 2`)) + 
    geom_point() + 
    facet_wrap(facets = "`Sample Name`") + 
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number_si()) +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number_si()) +
    geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red") 

cpm.max <- aba.cpm %>%
    unnest(data) %>%
    select(`Replicate 1`, `Replicate 2`) %>% 
    max

aba.cpm %>% 
    unnest(data) %>% 
    inner_join(aba_key) %>% 
    arrange(desc(type)) %>% 
    mutate(Type = type) %>%
    filter(`Sample Name` %like% "No drug") %>%
    ggplot(aes(x = `Replicate 1`, y = `Replicate 2`)) + 
    geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "black") +
    geom_point(aes(color = Type)) + 
    scale_colour_manual(
        values =
            c("control" = alpha("grey", 0.10), 
                "mismatch" = alpha("#1F78B4", 0.10),
                "perfect" = alpha("#E31A1C", 0.5))) +
    facet_wrap(facets = c("`Sample Name`")) + 
    theme(
            legend.position = "bottom",
            legend.key = element_rect(fill = "#ECECEC")) +
    guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) + 
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale()),
        limits = c(0, cpm.max)) +
        scale_y_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale()),
        limits = c(0, cpm.max)) +
    facet_wrap(facets = c("`Sample Name`")) +
    doc_theme
## Joining, by = "spacer"

aba.cpm %>% 
    unnest(data) %>% 
    inner_join(aba_key) %>% 
    arrange(desc(type)) %>% 
    mutate(Type = type) %>%
    filter(!`Sample Name` %like% "No drug") %>%
    ggplot(aes(x = `Replicate 1`, y = `Replicate 2`)) + 
    geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "black") +
    geom_point(aes(color = Type)) + 
    scale_colour_manual(
        values =
            c("control" = alpha("grey", 0.15), 
                "mismatch" = alpha("#1F78B4", 0.15),
                "perfect" = alpha("#E31A1C", 0.5))) +
    facet_wrap(facets = c("`Sample Name`")) + 
    theme(
            legend.position = "bottom",
            legend.key = element_rect(fill = "#ECECEC")) +
    guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) + 
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale()),
        limits = c(0, cpm.max)) +
        scale_y_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale()),
        limits = c(0, cpm.max)) +
    facet_wrap(facets = c("`Sample Name`")) +
    doc_theme
## Joining, by = "spacer"